# you may need to install the packages
# install.packages("stringr")
# install.packages("plotly")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

getting copy of data

github <- "https://raw.githubusercontent.com/ucb-stat133/stat133-fall-2018/master/"
datafile <- "data/mobile-food-sf.csv"
download.file(paste0(github, datafile), destfile = "mobile-food-sf.csv")

dat <- read.csv('mobile-food-sf.csv', stringsAsFactors = FALSE)

Plots with plotly

day_freqs <- table(dat$DayOfWeekStr)

barplot(day_freqs, border = NA, las = 3)

plot_ly(x = names(day_freqs), 
        y = day_freqs,
        type = 'bar')
day_counts <- dat %>% 
  select(DayOfWeekStr) %>%
  group_by(DayOfWeekStr) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

day_counts
plot_ly(day_counts, 
        x = ~DayOfWeekStr, 
        y = ~count,
        type = 'bar')
plot_ly(day_counts, 
        x = ~reorder(DayOfWeekStr, count), 
        y = ~count,
        type = 'bar')

Changing times

time1 <- '10AM'

# hour
str_sub(time1, start = 1, end = 2)
## [1] "10"
## [1] "10"
# period
str_sub(time1, start = 3, end = 4)
## [1] "AM"
## [1] "AM"
times <- c('12PM', '10AM', '9AM', '8AM', '2PM')

# subset time
times <- str_sub(times,start = 1, end = 1 )
times
## [1] "1" "1" "9" "8" "2"
# subset period
str_sub(times, start = -2)
## [1] "1" "1" "9" "8" "2"
times <- c('12PM', '10AM', '9AM', '8AM', '2PM')
str_replace(times, pattern = 'AM|PM', replacement = '')
## [1] "12" "10" "9"  "8"  "2"
## [1] "12" "10" "9"  "8"  "2"

times <- c('12PM', '10AM', '9AM', '8AM', '2PM')

hours <- str_replace(times, pattern = 'AM|PM', replacement = '')
hours
## [1] "12" "10" "9"  "8"  "2"
times <- c('12PM', '10AM', '9AM', '8AM', '2PM')
periods <- str_sub(times, start = -2, end = -1)

periods
## [1] "PM" "AM" "AM" "AM" "PM"
a <-data.frame(table(periods))
a
plot_ly(a,
        x = a$periods,
        y = a$Freq)
## No trace type specified:
##   Based on info supplied, a 'bar' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#bar
start24 <- format(strptime(times, "%I %p"), format="%H")

start24
## [1] "12" "10" "09" "08" "14"
dat$start <- as.numeric(format(strptime(dat$starttime, "%I %p"), format="%H"))

dat$end <- as.numeric(format(strptime(dat$endtime, "%I %p"), format="%H"))

dat$duration <- dat$end - dat$start
dat

Lat and long coordinates

loc1 <- "(37.7651967350509,-122.416451692902)"
str_replace(loc1, pattern = '\\(', replacement = '')
## [1] "37.7651967350509,-122.416451692902)"
str_replace(loc1, pattern = '\\)', replacement = '')
## [1] "(37.7651967350509,-122.416451692902"
str_replace_all(loc1, pattern = '\\(|\\)', replacement = '')
## [1] "37.7651967350509,-122.416451692902"
lat_lon <- str_replace_all(loc1, pattern = '\\(|\\)', replacement = '')

str_replace(lat_lon, pattern = ',', replacement = '')
## [1] "37.7651967350509-122.416451692902"
str_split(lat_lon, pattern = ',')
## [[1]]
## [1] "37.7651967350509"  "-122.416451692902"

Manipulating more location values

locs <- c(
  "(37.7651967350509,-122.416451692902)",
  "(37.7907890558203,-122.402273431333)",
  "(37.7111991003088,-122.394693339395)",
  "(37.7773000262759,-122.394812784799)",
  NA
)

lat_lon <- str_replace_all(locs, pattern = '\\(|\\)', replacement = '')

lat_lon <- str_replace(lat_lon, pattern = ',', replacement = '')

lat_lon <- str_split(lat_lon, pattern = '-')
lat_lon
## [[1]]
## [1] "37.7651967350509" "122.416451692902"
## 
## [[2]]
## [1] "37.7907890558203" "122.402273431333"
## 
## [[3]]
## [1] "37.7111991003088" "122.394693339395"
## 
## [[4]]
## [1] "37.7773000262759" "122.394812784799"
## 
## [[5]]
## [1] NA
lat <- lapply(lat_lon, function(x) x[1])
lon <- lapply(lat_lon, function(x) x[2])

lat <- as.numeric(unlist(lat))
lon <- as.numeric(unlist(lon))

lat_long <- str_replace_all(dat$Location, pattern = '\\(|\\)', replacement = '')
lat_long <- str_replace(lat_long, pattern = ',', replacement = '')
lat_long <- str_split(lat_long, pattern = '-')

latitudes <- lapply(lat_long, function(x) x[1])
latitudes <- as.numeric(unlist(latitudes))


longitudes <- lapply(lat_long, function(x) x[2])
longitudes <- as.numeric(unlist(longitudes))

longitudes <- -1 * longitudes



dat$lat <- latitudes 
dat$lon <- longitudes

plotting locations on a map

plot(dat$lon, dat$lat, pch = 19, col = "#77777744")

plot_ly(data = dat, x = ~lon, y = ~lat, type = 'scatter', mode = 'markers')
## Warning: Ignoring 40 observations

Maps with RgoogleMaps

library(RgoogleMaps)

# coordinates for center of the map
center <- c(mean(dat$lat, na.rm = TRUE), mean(dat$lon, na.rm = TRUE))

# zoom value
zoom <- min(MaxZoom(range(dat$lat, na.rm = TRUE), 
                    range(dat$lon, na.rm = TRUE)))

# san francisco map
map1 <- GetMap(center=center, zoom=zoom, destfile = "san-francisco.png")

PlotOnStaticMap(map1, dat$lat, dat$lon, col = "#ed4964", pch=20)

#Maps with ggmap

library(ggmap)

# skip this part (come back if you run into some error messages)
# (go back to a previous version of ggplot)
devtools::install_github("hadley/ggplot2@v2.2.0")
devtools::install_github("dkahle/ggmap")

# add variables 'lat' and 'lon' to the data frame
dat$lat <- latitudes
dat$lon <- longitudes

# let's get rid of rows with missing values
dat <- na.omit(dat)

# ggmap typically asks you for a zoom level, 
# but we can try using ggmap's make_bbox function:
sbbox <- make_bbox(lon = dat$lon, lat = dat$lat, f = .1)
sbbox

# get a 'terrain' map
sf_map <- get_map(location = sbbox, maptype = "terrain", source = "google")

ggmap(sf_map) + 
  geom_point(data = dat, 
             mapping = aes(x = lon, y = lat), 
             color = "red", alpha = 0.2, size = 1)

Looking for specific types of food

dat$optionaltext[1:3]
## [1] "Tacos, Burritos, Tortas, Quesadillas, Mexican Drinks, Aguas Frescas"   
## [2] "Cold Truck: sandwiches, drinks, snacks, candy, hot coffee"             
## [3] "Cold Truck: Pre-packaged Sandwiches, Various Beverages, Salads, Snacks"
foods <- dat$optionaltext[1:10]
foods
##  [1] "Tacos, Burritos, Tortas, Quesadillas, Mexican Drinks, Aguas Frescas"                                                                                                                                                                                                                                                                      
##  [2] "Cold Truck: sandwiches, drinks, snacks, candy, hot coffee"                                                                                                                                                                                                                                                                                
##  [3] "Cold Truck: Pre-packaged Sandwiches, Various Beverages, Salads, Snacks"                                                                                                                                                                                                                                                                   
##  [4] "Hot Dogs, Hamburgers, Nachos, Steaks, Pastas, Asian Dishes, Tri-Tip Sandwiches, Sodas & Water"                                                                                                                                                                                                                                            
##  [5] "Cold Truck: Sandwiches, fruit, snacks, candy, hot and cold drinks"                                                                                                                                                                                                                                                                        
##  [6] "Cold Truck: Cheeseburgers, Burgers, Chicken Bake, Chili Dogs, Hot Dogs, Corn Dogs, Cup of Noodles, Egg Muffins, Tamales, Hot Sandwiches Quesadillas, Gatorade, Juice, Soda, Mikl, Coffee, Hot Cocoa, Hot Tea, Flan, Fruits, Fruit Salad, Yogurt, Candy, Chips,  Donuts, Cookies, Granola, Muffins & Various Drinks & Pre-Packaged Snacks."
##  [7] "Cold Truck: sandwiches, drinks, snacks, candy, hot coffee"                                                                                                                                                                                                                                                                                
##  [8] "Cold Truck: Pre-packaged Sandwiches, Various Beverages, Salads, Snacks"                                                                                                                                                                                                                                                                   
##  [9] "Cold Truck: Sandwiches, fruit, snacks, candy, hot and cold drinks"                                                                                                                                                                                                                                                                        
## [10] "Cold Truck: Pre-packaged sandwiches, snacks, fruit, various beverages"
str_detect(foods, pattern = "Burritos|burritos")
##  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
str_detect(foods, pattern = "tacos|quesadillas")
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
burritos <- subset(dat, str_detect(dat$optionaltext, pattern = "Burritos|burritos")
 == TRUE)
burritos
# coordinates for center of the map
center <- c(mean(burritos$lat, na.rm = TRUE), mean(burritos$lon, na.rm = TRUE))

# zoom value
zoom <- min(MaxZoom(range(burritos$lat, na.rm = TRUE), 
                    range(burritos$lon, na.rm = TRUE)))

# san francisco map
map1 <- GetMap(center=center, zoom=zoom, destfile = "san-francisco.png")

PlotOnStaticMap(map1, burritos$lat, burritos$lon, col = "#ed4964", pch=20)

#Tacos

tacos <- subset(dat, str_detect(dat$optionaltext, pattern = "Tacos|tacos"))

# coordinates for center of the map
center <- c(mean(tacos$lat, na.rm = TRUE), mean(tacos$lon, na.rm = TRUE))

# zoom value
zoom <- min(MaxZoom(range(tacos$lat, na.rm = TRUE), 
                    range(tacos$lon, na.rm = TRUE)))

# san francisco map
map1 <- GetMap(center=center, zoom=zoom, destfile = "san-francisco.png")

PlotOnStaticMap(map1, tacos$lat, tacos$lon, col = "#ed4964", pch=20)

dat$types <- str_split(dat$optionaltext, pattern = ",")


#Tried facetting but could not figure it out